#include <sstream>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "BBS2chProxyHTML2Dat.h"
#include "stringEncodingConverter.h"
#include "utils.h"

extern char *user_agent;

extern void log_printf(int level, const char *format ...);
#ifdef _WIN32
#define gmtime_r(a, b) gmtime_s(b, a)
#endif

static const char threadTimestampFmt[] = "%Y/%m/%d %H:%M:%S %Z";
static const char *wdays[7] = {
	"日",
	"月",
	"火",
	"水",
	"木",
	"金",
	"土"
};

static int decryptMail(unsigned char *decrypted, char *encrypted)
{
	char current[5]="0x";
	unsigned char *ptr = decrypted;
	current[2] = encrypted[0];
	current[3] = encrypted[1];
	unsigned int r = strtol(current,NULL,16);
	int len = strlen(encrypted);
	int n = 2;
	for(;n<len;n+=2) {
		current[2] = encrypted[n];
		current[3] = encrypted[n+1];
		unsigned int i = strtol(current,NULL,16);
		*ptr++ = i^r;
	}
	*ptr = 0;
	//fprintf(stderr,"%s->%s\n",encrypted,decrypted);
	return ptr - decrypted;
}

static void replaceAll(std::string &input, const std::string &oldValue, const std::string &newValue)
{
	if (!oldValue.empty()) {
		size_t pos = 0;
		while ((pos = input.find(oldValue, pos)) != std::string::npos) {
			input.replace(pos, oldValue.size(), newValue);
			pos += newValue.size();
		}
	}
}

static void escapeForHTML(std::string &input)
{
	replaceAll(input, "&", "&amp;");
	replaceAll(input, "<", "&lt;");
	replaceAll(input, ">", "&gt;");
	replaceAll(input, "\"", "&quot;");
	replaceAll(input, "'", "&#39;");
}

BBS2chProxyHTML2Dat5ch::BBS2chProxyHTML2Dat5ch(BBS2chProxyThreadCache *cache, const BBS2chThreadIdentifier &identifier, bool useHttps, CURL *curl)
	: IBBS2chProxyHTML2Dat(cache, identifier, curl)
{
	_url = useHttps ? "https://" : "http://";
	_url += identifier.host;
	_url += "/test/read.cgi/";
	_url += identifier.board;
	_url += '/';
	_url += identifier.key;
	_url += '/';
}

BBS2chProxyHTML2DatTalk::BBS2chProxyHTML2DatTalk(BBS2chProxyThreadCache *cache, const BBS2chThreadIdentifier &identifier, CURL *curl)
	: IBBS2chProxyHTML2Dat(cache, identifier, curl), _cachedJson(NULL)
{
	_url = "https://talk.jp/api/boards/";
	_url += identifier.board;
	_url += "/threads/";
	_url += identifier.key;
}

BBS2chProxyHTML2DatTalkHTML::BBS2chProxyHTML2DatTalkHTML(BBS2chProxyThreadCache *cache, const BBS2chThreadIdentifier &identifier, CURL *curl)
	: BBS2chProxyHTML2DatTalk(cache, identifier, curl)
{
	_url = "https://talk.jp/boards/";
	_url += identifier.board;
	_url += '/';
	_url += identifier.key;
	_url += '/';
}

BBS2chProxyHTML2Dat5chItest::BBS2chProxyHTML2Dat5chItest(BBS2chProxyThreadCache *cache, const BBS2chThreadIdentifier &identifier, CURL *curl)
	: BBS2chProxyHTML2DatTalk(cache, identifier, curl)
{
	_url = "https://itest.5ch.net/public/newapi/client.php?subdomain=";
	_url += identifier.hostPrefix;
	_url += "&board=";
	_url += identifier.board;
	_url += "&dat=";
	_url += identifier.key;
}

std::vector<char> IBBS2chProxyHTML2Dat::getHtmlFromURL(const std::string &url, long *outStatusCode)
{
	CURLcode res;
	long statusCode = 0;
	std::vector<char> html;
	configureCurlHandle(_curl);
	curl_easy_setopt(_curl, CURLOPT_URL, url.c_str());
	curl_easy_setopt(_curl, CURLOPT_ENCODING, "");
	curl_easy_setopt(_curl, CURLOPT_WRITEFUNCTION, write_callback_download);
	curl_easy_setopt(_curl, CURLOPT_WRITEDATA, &html);
	curl_easy_setopt(_curl, CURLOPT_FOLLOWLOCATION, 1L);
	if (user_agent) {
		curl_easy_setopt(_curl, CURLOPT_USERAGENT, user_agent);
	}
	else if (!_userAgent.empty()) {
		curl_easy_setopt(_curl, CURLOPT_USERAGENT, _userAgent.c_str());
	}
	res = curl_easy_perform(_curl);
	if (res == CURLE_OK) {
		curl_easy_getinfo(_curl, CURLINFO_RESPONSE_CODE, &statusCode);
		if (statusCode != 200) html.clear();
	} else {
		log_printf(0, "curl error: %s (%s)\n", curl_easy_strerror(res), url.c_str());
	}
	curl_easy_reset(_curl);
	if (outStatusCode) *outStatusCode = statusCode;
	return html;
}

void IBBS2chProxyHTML2Dat::setRequestHeaders(BBS2chProxyHttpHeaders &headers)
{
	if (headers.has("User-Agent")) {
		_userAgent = headers.get("User-Agent");
	}
}

const std::string& IBBS2chProxyHTML2Dat::getKey()
{
	return _threadKey;
}

std::string BBS2chProxyHTML2Dat5ch::generateDatFrom(int startFrom, time_t *lastModifiedOut, bool useCache, long *outStatusCode)
{
	std::string tmpURL(_url);
	if (startFrom > 1) {
		std::ostringstream ss;
		ss << startFrom << "-n";
		tmpURL += ss.str();
	} else {
		tmpURL += "1-";
	}
	curl_easy_setopt(_curl, CURLOPT_COOKIE, "5chClassic=on");
	std::vector<char> html = getHtmlFromURL(tmpURL, outStatusCode);
	return html2dat(html, startFrom, lastModifiedOut, useCache);
}

std::string BBS2chProxyHTML2DatTalk::generateDatFrom(int startFrom, time_t *lastModifiedOut, bool useCache, long *outStatusCode)
{
	if (!_cachedJson) {
		std::vector<char> json = getHtmlFromURL(_url, outStatusCode);
		if (json.empty()) return "";
		json.push_back(0);
		_cachedJson = json_parse_string(&json.front());
	}
	if (!_cachedJson) return "";
	return json2dat(_cachedJson, startFrom, lastModifiedOut, useCache);
}

std::string BBS2chProxyHTML2DatTalkHTML::generateDatFrom(int startFrom, time_t *lastModifiedOut, bool useCache, long *outStatusCode)
{
	if (!_cachedJson) {
		std::vector<char> html = getHtmlFromURL(_url, outStatusCode);
		if (html.empty()) return "";
		html.push_back(0);
		const char *ptr = strstr(&html.front(), "id=\"__NEXT_DATA__\"");
		if (ptr) {
			ptr += strlen("id=\"__NEXT_DATA__\"");
			while (*ptr != '>' && *ptr != 0) ptr++;
			if (*ptr) {
				const char *end = strstr(++ptr, "</script>");
				if (end) {
					std::string jsonStr(ptr, end-ptr);
					_cachedJson = json_parse_string(jsonStr.c_str());
				}
			}
		}
	}
	if (!_cachedJson) return "";
	JSON_Value *threadData = json_object_dotget_value(json_object(_cachedJson), "props.pageProps.threadData");
	return json2dat(threadData, startFrom, lastModifiedOut, useCache);
}

std::string BBS2chProxyHTML2Dat5chItest::generateDatFrom(int startFrom, time_t *lastModifiedOut, bool useCache, long *outStatusCode)
{
	if (!_cachedJson) {
		std::vector<char> json = getHtmlFromURL(_url, outStatusCode);
		if (json.empty()) return "";
		json.push_back(0);
		_cachedJson = json_parse_string(&json.front());
	}
	if (!_cachedJson) return "";
	return json2dat(_cachedJson, startFrom, lastModifiedOut, useCache);
}

std::string BBS2chProxyHTML2Dat5ch::html2dat_old(std::vector<char> &html, int startResNum, time_t *lastModified, bool useCache)
{
	char *ptr = &html.front();
	char *end = &html.back();
	std::string txt;
	int res = startResNum, i=0;
	char signature[32];
	char title[1024];
	int cachedSize = 0;
	bool bbspink = strstr(_threadKey.c_str(),"bbspink.com") ? true : false;
	
	ptr = (char *)memmem_priv(ptr, end-ptr+1, "<title>", 7);
	if(!ptr) {
		return "";
	}
	ptr += 7;
	while(1) {
		if(*ptr == '<') {
			if(!strncasecmp(ptr,"</title>",8)) {
				ptr += 8;
				break;
			}
			else title[i++] = *ptr++;
		}
		else title[i++] = *ptr++;
	}
	title[i] = 0;
	
	snprintf(signature,32,"<dt>%d ",res);
	ptr = (char *)memmem_priv(ptr, end-ptr+1, signature, strlen(signature));
	if(!ptr) {
		return "";
	}
	
	unsigned char *buffer = (unsigned char *)malloc(65536+1024+1024+1024+2048);
	if(!buffer) {
		return "";
	}
	
	unsigned char *body = buffer;
	char *mail = (char *)body + 65536;
	char *name = mail + 1024;
	char *date = name + 1024;
	char *encrypted = date + 1024;
	
	while(ptr < end) {
		//fprintf(stderr,"%s\n",signature);
		std::string resData;
		i=0;
		mail[0] = 0;
		ptr = strstr(ptr,signature);
		ptr += strlen(signature);
		while(*ptr != '<') ptr++;
		ptr++;
		const char *endStr;
		if(*ptr == 'a' || *ptr == 'A') {
replay:
			// has mail
			while(*ptr != '"') ptr++;
			ptr++;
			if(!strncmp(ptr,"/cdn-cgi/l/email-protection#",28)) {
				ptr += 28;
				while(*ptr != '"' && *ptr != 'X') encrypted[i++] = *ptr++;
				encrypted[i] = 0;
				i = decryptMail((unsigned char *)mail,encrypted);
				int reconstruct_len = *ptr == 'X' ? i + 15 : i + 16;
				ptr -= reconstruct_len;
				char *start = ptr;
				memcpy(ptr, "<a href=\"mailto:", 16);
				ptr += 16;
				memcpy(ptr, mail, i);
				ptr = start;
				i=0;
				goto replay;
			}
			else {
				if(!strncmp(ptr,"mailto:",7)) ptr += 7;
				while(*ptr != '"') mail[i++] = *ptr++;
				mail[i] = 0;
			}
			endStr = "</a>";
		}
		else if(*ptr == 'b') {
			endStr = NULL;
		}
		else {
			endStr = "</font>";
		}
		
		if(endStr) {
			ptr = strstr(ptr,"<b>");
			ptr += 3;
		}
		else {
			ptr = strchr(ptr,'>');
			ptr++;
		}
		
		i=0;
		while(1) {
			if(*ptr == '<') {
				if(!strncasecmp(ptr,"</b>",4) && (!endStr || !strncasecmp(ptr+4,endStr,strlen(endStr)))) {
					ptr += 4;
					if(endStr) ptr += strlen(endStr);
					break;
				}
				else if(!strncmp(ptr,"<span class=\"__cf_email__\"",26)) {
					int j=0;
					ptr = strstr(ptr,"data-cfemail=\"");
					ptr += 14;
					while(*ptr != '"') encrypted[j++] = *ptr++;
					encrypted[j] = 0;
					j = decryptMail((unsigned char *)name+i,encrypted);
					i += j;
					ptr = strstr(ptr,"</script>");
					ptr += 9;
				}
				else name[i++] = *ptr++;
			}
			else name[i++] = *ptr++;
		}
		
		resData.append(name, i);
		resData.append("<>");
		if(mail[0]) resData.append(mail);
		resData.append("<>");
		
		ptr += 2;
		i=0;
		while(1) {
			if(*ptr == '<') {
				if(!strncasecmp(ptr,"<dd>",4)) {
					ptr += 4;
					break;
				}
				else if(!strncmp(ptr,"<a href=\"javascript:be(",23)) {
					memcpy(date+i,"BE:",3);
					ptr += 23;
					i += 3;
					while(*ptr != ')') date[i++] = *ptr++;
					date[i++] = '-';
					ptr = strchr(ptr,'?');
					ptr++;
					char *tmp = strstr(ptr,"</a>");
					memcpy(date+i,ptr,tmp-ptr);
					i += tmp-ptr;
					ptr = tmp + 4;
				}
				else date[i++] = *ptr++;
			}
			else date[i++] = *ptr++;
		}
		
		resData.append(date, i);
		resData.append("<>");
		
		i=0;
		while(1) {
			if(*ptr == '<') {
				if(!strncasecmp(ptr,"<br><br>\n",9)) {
					ptr += 9;
					break;
				}
				else if(!strncasecmp(ptr,"<dt>",4) || !strncasecmp(ptr,"</dl>",5)) {
					while(i>0 &&body[i-1] == '\n') i--;
					break;
				}
				else if(!strncmp(ptr,"<span class=\"__cf_email__\"",26) || !strncmp(ptr,"<a class=\"__cf_email__\"",23)) {
					int j=0;
					ptr = strstr(ptr,"data-cfemail=\"");
					ptr += 14;
					while(*ptr != '"') encrypted[j++] = *ptr++;
					encrypted[j] = 0;
					j = decryptMail(body+i,encrypted);
					i += j;
					ptr = strstr(ptr,"</script>");
					ptr += 9;
				}
				else if(!strncmp(ptr,"<a href=\"http",13)) {
					ptr = strchr(ptr,'>');
					ptr++;
					char *link = ptr;
					ptr = strstr(link,"</a>");
					memcpy(body+i,link,ptr-link);
					i += ptr-link;
					ptr += 4;
				}
				else if(!strncmp(ptr,"<img src=\"",10)) {
					ptr += 10;
					char *img = ptr;
					ptr = strstr(img,"\">");
					memcpy(body+i,img,ptr-img);
					if(memmem_priv(img,ptr-img,"/img.2ch.net",12) || memmem_priv(img,ptr-img,"/img.5ch.net",12) || memmem_priv(img,ptr-img,"/o.8ch.net",10) || memmem_priv(img,ptr-img,"/o.5ch.net",10)) {
						int length = ptr-img;
						while(*img != '/') {
							img++;
							length--;
						}
						memcpy(body+i,"sssp:",5);
						memcpy(body+i+5,img,length);
						i += length + 5;
					}
					else i += ptr-img;
					ptr += 2;
				}
				else if(!bbspink && !strncmp(ptr,"<br>",4)) {
					if(i>5 && !strncmp((char *)body+i-5,"<br> ",5)) {
						memcpy(body+i," <br>",5);
						i += 5;
					}
					else {
						memcpy(body+i,"<br>",4);
						i += 4;
					}
					ptr += 4;
				}
				else body[i++] = *ptr++;
			}
			else if(!bbspink && *ptr == ' ') {
				if(*(ptr+1) == ' ') ptr++;
				else body[i++] = *ptr++;
			}
			else body[i++] = *ptr++;
		}
		
		resData.append((const char *)body ,i);
		resData.append("<>");
		if(res == 1) resData.append(title);
		resData.append("\n");
		
		if(useCache && res == startResNum) {
			PBBS2chProxyThreadInfo info = _threadCache->pop(_threadKey);
			bool hit = false;
			if(info) {
				log_printf(5,"cache hit");
				if(info->cachedData.size() == resData.size()) {
					log_printf(5,"... size match");
					if(info->cachedData == resData) {
						log_printf(5,"... content match");
						hit = true;
						cachedSize = info->cachedSize - resData.size();
					}
				}
				log_printf(5,"\n");
			}
			if(!hit) {
				free(buffer);
				return "";
			}
		}
		
		txt += resData;
		res++;
		while(*ptr == '\n' || *ptr == '\r') ptr++;
		snprintf(signature,32,"<dt>%d ",res);
		if(!memmem_priv(ptr, end-ptr+1, signature, strlen(signature))) {
			PBBS2chProxyThreadInfo info(new BBS2chProxyThreadInfo());
			info->lastResNum = res-1;
			info->cachedSize = txt.size()+cachedSize;
			info->cachedData = resData;
			_threadCache->set(_threadKey, info);
			log_printf(5,"cached thread %s (%ld bytes)\n",_threadKey.c_str(),resData.size());
			
			if(lastModified) {
				*lastModified = 0;
				char formattedDate[256];
				char *ptr;
				ptr = date;
				int year = strtol(ptr,&ptr,10);
				if(*ptr != '/') break;
				ptr++;
				int month = strtol(ptr,&ptr,10);
				if(*ptr != '/') break;
				ptr++;
				int day = strtol(ptr,&ptr,10);
				if(!*ptr) break;
				while(*ptr != ' ' && *ptr != 0) ptr++;
				if(!*ptr) break;
				ptr++;
				int hour = strtol(ptr,&ptr,10);
				if(*ptr != ':') break;
				ptr++;
				int minutes = strtol(ptr,&ptr,10);
				if(*ptr != ':') break;
				ptr++;
				int seconds = strtol(ptr,&ptr,10);
				if(!(month>0 && month<13) || !(day>0 && day<32)) break;
				if(year < 100) year += 2000;
#if LIBCURL_VERSION_NUM >= 0x070c02 /* curl 7.12.2 or later */
				snprintf(formattedDate, 256, "%d%02d%02d %02d:%02d:%02d +0900", year, month, day, hour, minutes, seconds);
				*lastModified = curl_getdate(formattedDate, NULL);
#else
				snprintf(formattedDate,256,"%d/%d/%d %02d:%02d:%02d JST",year,month,day,hour,minutes,seconds);
				struct tm time = {0};
				strptime(formattedDate,threadTimestampFmt,&time);
				*lastModified = mktime(&time);
#endif
			}
			//fprintf(stderr,"not found,%ld\n",end-ptr+1);
			break;
		}
	}
	
	free(buffer);
	return txt;
}

std::string BBS2chProxyHTML2Dat5ch::html2dat(std::vector<char> &html, int startResNum, time_t *lastModified, bool useCache)
{
	char *ptr = &html.front();
	char *end = &html.back();
	std::string txt;
	int res = startResNum, i=0;
	char signature[64];
	char title[1024];
	int cachedSize = 0;
	char signatureTag[32];
	char closeTag[48];
	int closeTagLen;
	bool isNewHTML = false;
	if (html.empty()) return "";
	
	ptr = (char *)memmem_priv(ptr, end-ptr+1, " id=\"threadtitle\">", 18);
	if (ptr) {
		isNewHTML = true;
		char *ptr2 = (char *)memmem_priv(ptr, end-ptr+1, "<article id=\"", 13);
		if (!ptr2) {
			strcpy(signatureTag, "div");
		} else {
			strcpy(signatureTag, "article");
		}
		const char *tmp = ptr;
		while (*tmp != '<') tmp--;
		memcpy(closeTag+2, tmp+1, ptr-tmp-1);
		closeTag[0] = '<';
		closeTag[1] = '/';
		closeTag[ptr-tmp+1] = '>';
		closeTag[ptr-tmp+2] = 0;
		ptr += 18;
		while (1) {
			if (*ptr == '<') {
				if (!strncasecmp(ptr, closeTag, strlen(closeTag))) {
					ptr += strlen(closeTag);
					break;
				}
				else title[i++] = *ptr++;
			}
			else if(*ptr == '\n') break;
			else title[i++] = *ptr++;
		}
		title[i] = 0;

		snprintf(signature, 32, "<%s id=\"%d\"", signatureTag, res);
	}
	else {
		ptr = &html.front();
		ptr = (char *)memmem_priv(ptr, end-ptr+1, "<h1 class=\"title\">", 18);
		if(!ptr) {
			return html2dat_old(html, startResNum, lastModified, useCache);
		}
		else {
			char *ptr2 = (char *)memmem_priv(ptr, end-ptr+1, " class=\"post\"", 13);
			if(ptr2) {
				char *tmp = ptr2;
				*ptr2 = 0;
				while(*ptr2 != '<') ptr2--;
				strcpy(signatureTag, ptr2);
				*tmp = ' ';
			}
			else {
				return "";
			}
			/*char *ptr2 = (char *)memmem_priv(ptr, end-ptr+1, "<dl class=\"post\"", 16);
			if(ptr2) {
				return html2dat_pink(html, startResNum, lastModified, useCache);
			}*/
		}
		
		ptr += 18;
		while(1) {
			if(*ptr == '<') {
				if(!strncasecmp(ptr,"</h1>",5)) {
					ptr += 5;
					break;
				}
				else title[i++] = *ptr++;
			}
			else if(*ptr == '\n') break;
			else title[i++] = *ptr++;
		}
		title[i] = 0;
		
		snprintf(signature,32,"%s class=\"post\" id=\"%d\"",signatureTag,res);
	}
	ptr = (char *)memmem_priv(ptr, end-ptr+1, signature, strlen(signature));
	if(!ptr) {
		return "";
	}
	
	unsigned char *buffer = (unsigned char *)malloc(65536+1024+1024+1024+2048);
	if(!buffer) {
		return "";
	}
	
	unsigned char *body = buffer;
	char *mail = (char *)body + 65536;
	char *name = mail + 1024;
	char *date = name + 1024;
	char *encrypted = date + 1024;
	
	while(ptr < end) {
		//fprintf(stderr,"%s\n",signature);
		std::string resData;
		i=0;
		mail[0] = 0;
		if (isNewHTML) ptr = strstr(ptr," class=\"postusername\"><b>");
		else ptr = strstr(ptr," class=\"name\"><b>");
		if(ptr) {
			char *tmp = ptr;
			*ptr = 0;
			while(*ptr != '<') ptr--;
			snprintf(closeTag, 48, "</%s>", ptr+1);
			closeTagLen = strlen(closeTag);
			if (isNewHTML) ptr = tmp + 25;
			else ptr = tmp + 17;
		}
		else {
			break;
		}
		
		char endStr[64];
		if(!strncmp(ptr,"<a ", 3)) {
			char *tmp = ptr;
			while (*tmp != '>') tmp++;
			ptr = (char *)memmem_priv(ptr, tmp-ptr, "href=\"", 6);
			if (!ptr) {
				ptr = tmp;
				goto mailEnd;
			}
replay:
			// has mail
			while(*ptr != '"') ptr++;
			ptr++;
			if(!strncmp(ptr,"/cdn-cgi/l/email-protection#",28)) {
				ptr += 28;
				while(*ptr != '"' && *ptr != 'X') encrypted[i++] = *ptr++;
				encrypted[i] = 0;
				i = decryptMail((unsigned char *)mail,encrypted);
				int reconstruct_len = *ptr == 'X' ? i + 15 : i + 16;
				ptr -= reconstruct_len;
				char *start = ptr;
				memcpy(ptr, "<a href=\"mailto:", 16);
				ptr += 16;
				memcpy(ptr, mail, i);
				ptr = start;
				i=0;
				goto replay;
			}
			else {
				if(!strncmp(ptr,"mailto:",7)) ptr += 7;
				while(1) {
					if(*ptr == '<' && !strncmp(ptr,"<a href=\"",9)) {
						ptr = strchr(ptr,'>');
						ptr++;
						char *link = ptr;
						ptr = strstr(link,"</a>");
						memcpy(mail+i,link,ptr-link);
						i += ptr-link;
						ptr += 4;
					}
					else if(*ptr == '"') break;
					else mail[i++] = *ptr++;
				}
				//while(*ptr != '"') mail[i++] = *ptr++;
				mail[i] = 0;
			}
mailEnd:
			snprintf(endStr,64,"</a></b>%s",closeTag);
			while(*ptr != '>') ptr++;
			ptr++;
		}
		/* we do not have to handle this special case because read.cgi on bbspink doesn't
		   emit font tags anymore and it conflicts with text decorations using "melon point" */
		/*else if(!strncmp(ptr,"<font",5)) {
			snprintf(endStr,64,"</font></b>%s",closeTag);
			while(*ptr != '>') ptr++;
			ptr++;
		}*/
		else {
			snprintf(endStr,64,"</b>%s",closeTag);
		}
		
		i=0;
		while(1) {
			if(*ptr == '<') {
				if(!strncmp(ptr,endStr,strlen(endStr))) {
					ptr += strlen(endStr);
					break;
				}
				else if(!strncmp(ptr,"<span class=\"__cf_email__\"",26)) {
					int j=0;
					ptr = strstr(ptr,"data-cfemail=\"");
					ptr += 14;
					while(*ptr != '"') encrypted[j++] = *ptr++;
					encrypted[j] = 0;
					j = decryptMail((unsigned char *)name+i,encrypted);
					i += j;
					ptr = strstr(ptr,"</script>");
					ptr += 9;
				}
				else if(!strncmp(ptr,"<a href=\"",9)) {
					ptr = strchr(ptr,'>');
					ptr++;
					char *link = ptr;
					ptr = strstr(link,"</a>");
					memcpy(name+i,link,ptr-link);
					i += ptr-link;
					ptr += 4;
				}
				else name[i++] = *ptr++;
			}
			else name[i++] = *ptr++;
		}
		
		resData.append(name, i);
		resData.append("<>");
		if(mail[0]) resData.append(mail);
		resData.append("<>");
		
		ptr = strstr(ptr," class=\"date\">");
		if(ptr) {
			char *tmp = ptr;
			*ptr = 0;
			while(*ptr != '<') ptr--;
			snprintf(closeTag, 48, "</%s>", ptr+1);
			closeTagLen = strlen(closeTag);
			ptr = tmp + 14;
		}
		else {
			break;
		}
		
		i=0;
		while(1) {
			if(*ptr == '<') {
				if(!strncasecmp(ptr,closeTag,closeTagLen)) {
					ptr += closeTagLen;
					break;
				}
				else date[i++] = *ptr++;
			}
			else date[i++] = *ptr++;
		}
		
		if(!strncmp(ptr,"<div class=\"uid",15) || !strncmp(ptr,"<span class=\"uid",16)) {
			char *tmp = ptr+1;
			while(*ptr != ' ') ptr++;
			*ptr = 0;
			snprintf(closeTag, 48, "</%s>", tmp);
			closeTagLen = strlen(closeTag);
			ptr += 11;
			while(*ptr != '>') ptr++;
			ptr++;
			date[i++] = ' ';
			while(1) {
				if(*ptr == '<') {
					if(!strncasecmp(ptr,closeTag,closeTagLen)) {
						ptr += closeTagLen;
						break;
					}
					else date[i++] = *ptr++;
				}
				else date[i++] = *ptr++;
			}
		}

		if (isNewHTML && !strncmp(ptr, "</span>", 7)) ptr += 7;
		
		if(!strncmp(ptr,"<div class=\"be",14) || !strncmp(ptr,"<span class=\"be",15)) {
			ptr += 14;
			while(*ptr != '>') ptr++;
			ptr++;
			if(!strncmp(ptr,"<a href=\"",9)) {
				ptr += 9;
				while(*ptr != '/' && *ptr != '"') ptr++;
				if(*ptr == '/' && (!strncmp(ptr,"//be.2ch.net/user/",18) || !strncmp(ptr,"//be.5ch.net/user/",18))) {
					memcpy(date+i," BE:",4);
					i += 4;
					ptr += 18;
					while(*ptr != '"') date[i++] = *ptr++;
					date[i++] = '-';
					ptr = strchr(ptr,'?');
					ptr++;
					char *tmp = strstr(ptr,"</a>");
					memcpy(date+i,ptr,tmp-ptr);
					i += tmp-ptr;
					ptr = tmp + 4;
				}
			}
		}
		
		resData.append(date, i);
		resData.append("<>");
		
		if (isNewHTML) {
			ptr = strstr(ptr," class=\"post-content\">");
			if (!ptr) {
				break;
			}
			else {
				char *tmp = ptr;
				char postContentTag[32];
				while (*tmp != '<') tmp--;
				memcpy(postContentTag, tmp+1, ptr-tmp-1);
				postContentTag[ptr-tmp-1] = 0;
				ptr += 22;
				if (!strncasecmp(ptr, "<span class=\"AA\">", 17)) {
					snprintf(closeTag, 48, "</span></%s>", postContentTag);
					closeTagLen = strlen(closeTag);
					ptr += 17;
				}
				else {
					snprintf(closeTag, 48, "</%s>", postContentTag);
					closeTagLen = strlen(closeTag);
				}
			}
		}
		else if(!strcmp(signatureTag,"<div")) {
			ptr = strstr(ptr,"<div class=\"message\">");
			if(!ptr) {
				break;
			}
			else {
				ptr += 21;
				if(!strncasecmp(ptr,"<span class=\"escaped\">",22)) {
					if(!strncasecmp(ptr+22,"<span class=\"AA\">",17)) {
						strcpy(closeTag,"</span></span></div>");
						closeTagLen = 20;
						ptr += 22+17;
					}
					else {
						strcpy(closeTag,"</span></div>");
						closeTagLen = 13;
						ptr += 22;
					}
				}
				else {
					strcpy(closeTag,"</div>");
					closeTagLen = 6;
				}
			}
		}
		else {
			ptr = strstr(ptr,"<dd class=\"thread_in\">");
			if(!ptr) {
				break;
			}
			strcpy(closeTag,"</dd>");
			closeTagLen = 5;
			ptr += 22;
		}
		i=0;
		while(1) {
			if(*ptr == '<') {
				if(!strncasecmp(ptr,closeTag,closeTagLen)) {
					ptr += closeTagLen;
					break;
				}
				else if(!strncmp(ptr,"<span class=\"__cf_email__\"",26) || !strncmp(ptr,"<a class=\"__cf_email__\"",23)) {
					int j=0;
					ptr = strstr(ptr,"data-cfemail=\"");
					ptr += 14;
					while(*ptr != '"') encrypted[j++] = *ptr++;
					encrypted[j] = 0;
					j = decryptMail(body+i,encrypted);
					i += j;
					ptr = strstr(ptr,"</script>");
					ptr += 9;
				}
				else if(!strncmp(ptr,"<a ",3)) {
					char *tmp = strchr(ptr,'>');
					char *href = (char *)memmem_priv(ptr,tmp-ptr,"href=\"",6);
					char *link = tmp+1;
					if(href && !strncmp(link,"&gt;&gt;",8) && memmem_priv(href,link-href,"test/read.cgi/",14)) {
						while(ptr < link) {
							if(!strncmp(ptr," class=\"",8)) {
								ptr += 8;
								while(*ptr != '"' && *ptr != '>') ptr++;
								if(*ptr == '"') ptr++;
							}
							else body[i++] = *ptr++;
						}
					}
					else {
						ptr = strstr(link,"</a>");
						memcpy(body+i,link,ptr-link);
						i += ptr-link;
						ptr += 4;
					}
				}
				else if(!strncmp(ptr,"<img src=\"",10)) {
					ptr += 10;
					char *img = ptr;
					ptr = strstr(img,"\">");
					memcpy(body+i,img,ptr-img);
					if(memmem_priv(img,ptr-img,"/img.2ch.net",12) || memmem_priv(img,ptr-img,"/img.5ch.net",12) || memmem_priv(img,ptr-img,"/o.8ch.net",10)  || memmem_priv(img,ptr-img,"/o.5ch.net",10)) {
						int length = ptr-img;
						while(*img != '/') {
							img++;
							length--;
						}
						memcpy(body+i,"sssp:",5);
						memcpy(body+i+5,img,length);
						i += length + 5;
					}
					else i += ptr-img;
					ptr += 2;
				}
				else if(!strncmp(ptr,"<br>",4)) {
					if(i>5 && !strncmp((char *)body+i-5,"<br> ",5)) {
						memcpy(body+i," <br>",5);
						i += 5;
					}
					else {
						memcpy(body+i,"<br>",4);
						i += 4;
					}
					ptr += 4;
				}
				else body[i++] = *ptr++;
			}
			else body[i++] = *ptr++;
		}
		
		resData.append((const char *)body, i);
		resData.append("<>");
		if(res == 1) resData.append(title);
		resData.append("\n");
		
		if(useCache && res == startResNum) {
			PBBS2chProxyThreadInfo info = _threadCache->pop(_threadKey);
			bool hit = false;
			if(info) {
				log_printf(5,"cache hit");
				if(info->cachedData.size() == resData.size()) {
					log_printf(5,"... size match");
					if(info->cachedData == resData) {
						log_printf(5,"... content match");
						hit = true;
						cachedSize = info->cachedSize - resData.size();
					}
				}
				log_printf(5,"\n");
			}
			if(!hit) {
				free(buffer);
				return "";
			}
		}
		
		txt += resData;
		res++;
		while(*ptr == '\n' || *ptr == '\r') ptr++;
		if (isNewHTML) snprintf(signature, 64, "<%s id=\"", signatureTag);
		else snprintf(signature,64,"%s class=\"post\" id=\"",signatureTag);
		while (1) {
			ptr = (char *)memmem_priv(ptr, end-ptr+1, signature, strlen(signature));
			if (!isNewHTML || !ptr) break;
			char *tmp = ptr;
			while (*tmp != '>') tmp++;
			tmp = (char *)memmem_priv(ptr, tmp-ptr, "data-date", 9);
			if (tmp) break;
			ptr++;
		}
		if(ptr) {
			int next = atoi(ptr+strlen(signature));
			if(next >= res) {
				while(next > res) {
					txt += "broken<><>broken<> broken <>\n";
					res++;
				}
			}
			else ptr = NULL;
		}
		if(!ptr) {
			PBBS2chProxyThreadInfo info(new BBS2chProxyThreadInfo());
			info->lastResNum = res-1;
			info->cachedSize = txt.size()+cachedSize;
			info->cachedData = resData;
			_threadCache->set(_threadKey, info);
			log_printf(5,"cached thread %s (%ld bytes)\n",_threadKey.c_str(),resData.size());
			
			if(lastModified) {
				*lastModified = 0;
				char formattedDate[256];
				char *ptr;
				ptr = date;
				int year = strtol(ptr,&ptr,10);
				if(*ptr != '/') break;
				ptr++;
				int month = strtol(ptr,&ptr,10);
				if(*ptr != '/') break;
				ptr++;
				int day = strtol(ptr,&ptr,10);
				if(!*ptr) break;
				while(*ptr != ' ' && *ptr != 0) ptr++;
				if(!*ptr) break;
				ptr++;
				int hour = strtol(ptr,&ptr,10);
				if(*ptr != ':') break;
				ptr++;
				int minutes = strtol(ptr,&ptr,10);
				if(*ptr != ':') break;
				ptr++;
				int seconds = strtol(ptr,&ptr,10);
				if(!(month>0 && month<13) || !(day>0 && day<32)) break;
				if(year < 100) year += 2000;
#if LIBCURL_VERSION_NUM >= 0x070c02 /* curl 7.12.2 or later */
				snprintf(formattedDate, 256, "%d%02d%02d %02d:%02d:%02d +0900", year, month, day, hour, minutes, seconds);
				*lastModified = curl_getdate(formattedDate, NULL);
#else
				snprintf(formattedDate,256,"%d/%d/%d %02d:%02d:%02d JST",year,month,day,hour,minutes,seconds);
				struct tm time = {0};
				strptime(formattedDate,threadTimestampFmt,&time);
				*lastModified = mktime(&time);
#endif
			}
			//fprintf(stderr,"not found,%ld\n",end-ptr+1);
			break;
		}
	}
	
	free(buffer);
	return txt;
}

std::string BBS2chProxyHTML2DatTalk::json2dat(JSON_Value *json, int startFrom, time_t *lastModifiedOut, bool useCache)
{
	std::string out;
	if (!json || json_type(json) != JSONObject) {
		return "";
	}
	JSON_Object *root = json_object(json);
	const char *title = json_object_dotget_string(root, "data.title");
	const char *quoteSource = json_object_dotget_string(root, "data.quote_source");
	JSON_Array *comments = json_object_dotget_array(root, "data.comments");
	if (!title || !comments) {
		return "";
	}
	if (startFrom < 1) startFrom = 1;
	int prevNumber = startFrom - 1;
	time_t lastModified = 0;
	size_t cachedSize = 0;
	std::string lastLine;
	for (size_t i=0, length=json_array_get_count(comments); i<length; i++) {
		std::stringstream line;
		JSON_Object *comment = json_array_get_object(comments, i);
		if (!comment) continue;
		int number = json_object_get_number(comment, "number");
		if (number < startFrom) continue;
		const char *name = json_object_dotget_string(comment, "writer.name");
		const char *trip = json_object_dotget_string(comment, "writer.trip");
		const char *slip = json_object_dotget_string(comment, "writer.slip");
		const char *id = json_object_dotget_string(comment, "writer.id");
		time_t timestamp = json_object_get_number(comment, "timestamp");
		const char *body = json_object_get_string(comment, "body");
		if (timestamp > lastModified) lastModified = timestamp;
		for (int j=prevNumber+1; j<number; j++) {
			out += "broken<><>broken<> broken <>\n";
		}
		if (name) {
			std::string tmp(name);
			escapeForHTML(tmp);
			line << tmp;
			if (trip) line << "</b>◆" << trip << "<b>";
			if (slip) line << " </b>(" << slip << ")<b>";
		}
		else line << "削除";
		line << "<><>"; //mail cannot be obtained from json!
		if (timestamp) {
			char dateStr[256] = "";
			struct tm timestamp_tm = {0};
			timestamp += 32400;
			gmtime_r(&timestamp, &timestamp_tm);
			strftime(dateStr, 256, "%Y/%m/%d(", &timestamp_tm);
			line << dateStr << wdays[timestamp_tm.tm_wday] << ") ";
			strftime(dateStr, 256, "%H:%M:%S", &timestamp_tm);
			line << dateStr;
			if (id) {
				line << " ID:" << id;
			}
		}
		else line << "削除";
		line << "<>";
		if (body) {
			std::string tmp(body);
			escapeForHTML(tmp);
			replaceAll(tmp, "\n", " <br> ");
			line << " " << tmp;
			if (number == 1 && quoteSource) {
				line << " <br>  <br> 出典 " << quoteSource;
			}
			line << " ";
		}
		else line << "削除";
		line << "<>";
		if (number == 1) {
			std::string tmp(title);
			escapeForHTML(tmp);
			line << tmp;
		}
		line << "\n";
		prevNumber = number;
		char *lineSJIS = convertUTF8ToShiftJISWithNCR(line.str().c_str(), line.str().size());
		if (lineSJIS) {
			lastLine = lineSJIS;
			out += lastLine;
			free(lineSJIS);
		} else {
			lastLine = "broken<><>broken<> broken <>\n";
			out += lastLine;
		}
		if (useCache && startFrom == number) {
			PBBS2chProxyThreadInfo info = _threadCache->pop(_threadKey);
			bool hit = false;
			if (info) {
				log_printf(5, "cache hit");
				if (info->cachedData.size() == lastLine.size()) {
					log_printf(5, "... size match");
					if (info->cachedData == lastLine) {
						log_printf(5, "... content match");
						hit = true;
						cachedSize = info->cachedSize - lastLine.size();
					}
				}
				log_printf(5, "\n");
			}
			if (!hit) {
				return "";
			}
		}
	}
	if (!lastLine.empty()) {
		PBBS2chProxyThreadInfo info(new BBS2chProxyThreadInfo());
		info->lastResNum = prevNumber;
		info->cachedSize = out.size() + cachedSize;
		info->cachedData = lastLine;
		_threadCache->set(_threadKey, info);
		log_printf(5, "cached thread %s (%ld bytes)\n", _threadKey.c_str(), lastLine.size());
	}
	if (lastModifiedOut) *lastModifiedOut = lastModified;
	return out;
}

std::string BBS2chProxyHTML2Dat5chItest::json2dat(JSON_Value *json, int startFrom, time_t *lastModifiedOut, bool useCache)
{
	std::string out;
	if (!json || json_type(json) != JSONObject) {
		return "";
	}
	JSON_Object *root = json_object(json);
	JSON_Array *threadMeta = json_object_get_array(root, "thread");
	time_t lastModified = json_array_get_number(threadMeta, 0);
	const char *boardAndKey = json_array_get_string(threadMeta, 3);
	const char *title = json_array_get_string(threadMeta, 5);
	JSON_Array *comments = json_object_get_array(root, "comments");
	if (!title || !*title || !comments) {
		return "";
	}
	if (startFrom < 1) startFrom = 1;
	int prevNumber = startFrom - 1;
	size_t cachedSize = 0;
	std::string lastLine;
	for (size_t i=0, length=json_array_get_count(comments); i<length; i++) {
		std::stringstream line;
		JSON_Array *comment = json_array_get_array(comments, i);
		if (!comment) continue;
		int number = json_array_get_number(comment, 0);
		if (number < startFrom) continue;
		const char *name = json_array_get_string(comment, 1);
		const char *mail = json_array_get_string(comment, 2);
		const char *date = json_array_get_string(comment, 3);
		const char *id = json_array_get_string(comment, 4);
		const char *be = json_array_get_string(comment, 5);
		const char *body = json_array_get_string(comment, 6);
		for (int j=prevNumber+1; j<number; j++) {
			out += "broken<><>broken<> broken <>\n";
		}
		if (name) line << name;
		else line << "削除";
		line << "<>";
		if (mail) line << mail;
		else line << "削除";
		line << "<>";
		if (date) {
			line << date;
			if (id && *id) {
				line << " ID:" << id;
			}
			if (be && *be) {
				line << " BE:" << be;
			}
		}
		else line << "削除";
		line << "<>";
		if (body) {
			const char *ptr = strstr(body, "&gt;&gt;");
			const char *start = body;
			while (ptr) {
				const char *tmp = ptr;
				unsigned int num = strtoul(ptr+8, (char **)&ptr, 10);
				if (num > 0) {
					if (tmp != start) line << std::string(start, tmp-start);
					line << "<a href=\"../test/read.cgi/" << boardAndKey << "/" << num << "\" rel=\"noopener noreferrer\" target=\"_blank\">";
					line << std::string(tmp, ptr-tmp);
					line << "</a>";
				}
				else line << std::string(start, ptr-start);
				start = ptr;
				ptr = strstr(start, "&gt;&gt;");
			}
			line << start;
		}
		else line << "削除";
		line << "<>";
		if (number == 1) {
			line << title;
		}
		line << "\n";
		prevNumber = number;
		char *lineSJIS = convertUTF8ToShiftJISWithNCR(line.str().c_str(), line.str().size());
		if (lineSJIS) {
			lastLine = lineSJIS;
			out += lastLine;
			free(lineSJIS);
		} else {
			lastLine = "broken<><>broken<> broken <>\n";
			out += lastLine;
		}
		if (useCache && startFrom == number) {
			PBBS2chProxyThreadInfo info = _threadCache->pop(_threadKey);
			bool hit = false;
			if (info) {
				log_printf(5, "cache hit");
				if (info->cachedData.size() == lastLine.size()) {
					log_printf(5, "... size match");
					if (info->cachedData == lastLine) {
						log_printf(5, "... content match");
						hit = true;
						cachedSize = info->cachedSize - lastLine.size();
					}
				}
				log_printf(5, "\n");
			}
			if (!hit) {
				return "";
			}
		}
	}
	if (!lastLine.empty()) {
		PBBS2chProxyThreadInfo info(new BBS2chProxyThreadInfo());
		info->lastResNum = prevNumber;
		info->cachedSize = out.size() + cachedSize;
		info->cachedData = lastLine;
		_threadCache->set(_threadKey, info);
		log_printf(5, "cached thread %s (%ld bytes)\n", _threadKey.c_str(), lastLine.size());
	}
	if (lastModifiedOut) *lastModifiedOut = lastModified;
	return out;
}
